import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('credit_train_cleaned.csv')
df.head()
| Loan Status | Current Loan Amount | Term | Credit Score | Annual Income | Years in current job | Home Ownership | Purpose | Monthly Debt | Years of Credit History | Months since last delinquent | Number of Open Accounts | Number of Credit Problems | Current Credit Balance | Maximum Open Credit | Bankruptcies | Tax Liens | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Fully Paid | 445412.0 | Short Term | 709.0 | 1167493.0 | 8 years | Home Mortgage | Home Improvements | 5214.74 | 17.2 | 32.0 | 6.0 | 1.0 | 228190.0 | 416746.0 | 1.0 | 0.0 |
| 1 | Fully Paid | 262328.0 | Short Term | 732.0 | 1219961.5 | 10+ years | Home Mortgage | Debt Consolidation | 33295.98 | 21.1 | 8.0 | 35.0 | 0.0 | 229976.0 | 850784.0 | 0.0 | 0.0 |
| 2 | Fully Paid | 789250.0 | Short Term | 741.0 | 2231892.0 | 8 years | Own Home | Debt Consolidation | 29200.53 | 14.9 | 29.0 | 18.0 | 1.0 | 297996.0 | 750090.0 | 0.0 | 0.0 |
| 3 | Fully Paid | 347666.0 | Long Term | 721.0 | 806949.0 | 3 years | Own Home | Debt Consolidation | 8741.90 | 12.0 | 32.0 | 9.0 | 0.0 | 256329.0 | 386958.0 | 0.0 | 0.0 |
| 4 | Fully Paid | 176220.0 | Short Term | 732.0 | 1219961.5 | 5 years | Rent | Debt Consolidation | 20639.70 | 6.1 | 32.0 | 15.0 | 0.0 | 253460.0 | 427174.0 | 0.0 | 0.0 |
from dataprep.eda import create_report
import streamlit as st
NumExpr defaulting to 8 threads.
fig = create_report(df)
fig
| Number of Variables | 17 |
|---|---|
| Number of Rows | 81994 |
| Missing Cells | 0 |
| Missing Cells (%) | 0.0% |
| Duplicate Rows | 0 |
| Duplicate Rows (%) | 0.0% |
| Total Size in Memory | 33.9 MB |
| Average Row Size in Memory | 433.9 B |
| Categorical | 5 |
|---|---|
| Numerical | 12 |
categorical
| Distinct Count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Memory Size | 5.9 MB |
| Mean | 10.276 |
|---|---|
| Standard Deviation | 0.447 |
| Median | 10 |
| Minimum | 10 |
| Maximum | 11 |
| 1st row | Fully Paid |
|---|---|
| 2nd row | Fully Paid |
| 3rd row | Fully Paid |
| 4th row | Fully Paid |
| 5th row | Fully Paid |
| Count | 760580 |
|---|---|
| Lowercase Letter | 596592 |
| Space Separator | 81994 |
| Uppercase Letter | 163988 |
| Dash Punctuation | 0 |
| Decimal Number | 0 |
numerical
| Distinct Count | 21820 |
|---|---|
| Unique (%) | 26.6% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 364869.3477 |
| Minimum | 10802 |
| Maximum | 789250 |
| Zeros | 0 |
| Zeros (%) | 0.0% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 10802 |
|---|---|
| 5-th Percentile | 77462 |
| Q1 | 179256 |
| Median | 307780 |
| Q3 | 519458.5 |
| 95-th Percentile | 789250 |
| Maximum | 789250 |
| Range | 778448 |
| IQR | 340202.5 |
| Mean | 364869.3477 |
|---|---|
| Standard Deviation | 231364.1979 |
| Variance | 5.3529e+10 |
| Sum | 2.9917e+10 |
| Skewness | 0.6528 |
| Kurtosis | -0.7801 |
| Coefficient of Variation | 0.6341 |
categorical
| Distinct Count | 2 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Memory Size | 5.8 MB |
| Mean | 9.7486 |
|---|---|
| Standard Deviation | 0.4338 |
| Median | 10 |
| Minimum | 9 |
| Maximum | 10 |
| 1st row | Short Term |
|---|---|
| 2nd row | Short Term |
| 3rd row | Short Term |
| 4th row | Long Term |
| 5th row | Short Term |
| Count | 717336 |
|---|---|
| Lowercase Letter | 553348 |
| Space Separator | 81994 |
| Uppercase Letter | 163988 |
| Dash Punctuation | 0 |
| Decimal Number | 0 |
numerical
| Distinct Count | 167 |
|---|---|
| Unique (%) | 0.2% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 721.8107 |
| Minimum | 585 |
| Maximum | 751 |
| Zeros | 0 |
| Zeros (%) | 0.0% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 585 |
|---|---|
| 5-th Percentile | 669 |
| Q1 | 715 |
| Median | 731 |
| Q3 | 738 |
| 95-th Percentile | 748 |
| Maximum | 751 |
| Range | 166 |
| IQR | 23 |
| Mean | 721.8107 |
|---|---|
| Standard Deviation | 25.0821 |
| Variance | 629.1124 |
| Sum | 5.9184e+07 |
| Skewness | -1.8412 |
| Kurtosis | 4.1478 |
| Coefficient of Variation | 0.03475 |
numerical
| Distinct Count | 35519 |
|---|---|
| Unique (%) | 43.3% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 1.333e+06 |
| Minimum | 76627 |
| Maximum | 3.6475e+07 |
| Zeros | 0 |
| Zeros (%) | 0.0% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 76627 |
|---|---|
| 5-th Percentile | 559740 |
| Q1 | 934800 |
| Median | 1.22e+06 |
| Q3 | 1.5005e+06 |
| 95-th Percentile | 2.6189e+06 |
| Maximum | 3.6475e+07 |
| Range | 3.6399e+07 |
| IQR | 565725 |
| Mean | 1.333e+06 |
|---|---|
| Standard Deviation | 817537.4739 |
| Variance | 6.6837e+11 |
| Sum | 1.0929e+11 |
| Skewness | 6.9318 |
| Kurtosis | 139.0272 |
| Coefficient of Variation | 0.6133 |
categorical
| Distinct Count | 11 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Memory Size | 5.7 MB |
| Mean | 7.7195 |
|---|---|
| Standard Deviation | 1.0166 |
| Median | 7 |
| Minimum | 6 |
| Maximum | 9 |
| 1st row | 8 years |
|---|---|
| 2nd row | 10+ years |
| 3rd row | 8 years |
| 4th row | 3 years |
| 5th row | 5 years |
| Count | 397898 |
|---|---|
| Lowercase Letter | 397898 |
| Space Separator | 88749 |
| Uppercase Letter | 0 |
| Dash Punctuation | 0 |
| Decimal Number | 110771 |
categorical
| Distinct Count | 4 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Memory Size | 5.8 MB |
| Mean | 8.7566 |
|---|---|
| Standard Deviation | 4.2836 |
| Median | 8 |
| Minimum | 4 |
| Maximum | 13 |
| 1st row | Home Mortgage |
|---|---|
| 2nd row | Home Mortgage |
| 3rd row | Own Home |
| 4th row | Own Home |
| 5th row | Rent |
| Count | 670654 |
|---|---|
| Lowercase Letter | 541164 |
| Space Separator | 47333 |
| Uppercase Letter | 129490 |
| Dash Punctuation | 0 |
| Decimal Number | 0 |
categorical
| Distinct Count | 16 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Memory Size | 6.4 MB |
| Mean | 16.3894 |
|---|---|
| Standard Deviation | 3.9258 |
| Median | 18 |
| Minimum | 5 |
| Maximum | 20 |
| 1st row | Home Improvements |
|---|---|
| 2nd row | Debt Consolidation |
| 3rd row | Debt Consolidation |
| 4th row | Debt Consolidation |
| 5th row | Debt Consolidation |
| Count | 1267849 |
|---|---|
| Lowercase Letter | 1117361 |
| Space Separator | 75443 |
| Uppercase Letter | 150488 |
| Dash Punctuation | 0 |
| Decimal Number | 0 |
numerical
| Distinct Count | 65760 |
|---|---|
| Unique (%) | 80.2% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 18322.9923 |
| Minimum | 0 |
| Maximum | 229057.92 |
| Zeros | 64 |
| Zeros (%) | 0.1% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 3690.427 |
| Q1 | 10117.3575 |
| Median | 16074.475 |
| Q3 | 23810.3725 |
| 95-th Percentile | 40226.933 |
| Maximum | 229057.92 |
| Range | 229057.92 |
| IQR | 13693.015 |
| Mean | 18322.9923 |
|---|---|
| Standard Deviation | 12022.0044 |
| Variance | 1.4453e+08 |
| Sum | 1.5024e+09 |
| Skewness | 1.8271 |
| Kurtosis | 8.2446 |
| Coefficient of Variation | 0.6561 |
numerical
| Distinct Count | 506 |
|---|---|
| Unique (%) | 0.6% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 18.2965 |
| Minimum | 3.6 |
| Maximum | 70.5 |
| Zeros | 0 |
| Zeros (%) | 0.0% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 3.6 |
|---|---|
| 5-th Percentile | 9 |
| Q1 | 13.5 |
| Median | 17 |
| Q3 | 21.8 |
| 95-th Percentile | 31.8 |
| Maximum | 70.5 |
| Range | 66.9 |
| IQR | 8.3 |
| Mean | 18.2965 |
|---|---|
| Standard Deviation | 7.0437 |
| Variance | 49.6142 |
| Sum | 1.5002e+06 |
| Skewness | 1.0767 |
| Kurtosis | 1.7325 |
| Coefficient of Variation | 0.385 |
numerical
| Distinct Count | 116 |
|---|---|
| Unique (%) | 0.1% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 33.3969 |
| Minimum | 0 |
| Maximum | 176 |
| Zeros | 177 |
| Zeros (%) | 0.2% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 9 |
| Q1 | 32 |
| Median | 32 |
| Q3 | 32 |
| 95-th Percentile | 67 |
| Maximum | 176 |
| Range | 176 |
| IQR | 0 |
| Mean | 33.3969 |
|---|---|
| Standard Deviation | 14.9462 |
| Variance | 223.3893 |
| Sum | 2.7383e+06 |
| Skewness | 0.9548 |
| Kurtosis | 2.1721 |
| Coefficient of Variation | 0.4475 |
numerical
| Distinct Count | 50 |
|---|---|
| Unique (%) | 0.1% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 11.1127 |
| Minimum | 0 |
| Maximum | 56 |
| Zeros | 2 |
| Zeros (%) | 0.0% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 5 |
| Q1 | 8 |
| Median | 10 |
| Q3 | 14 |
| 95-th Percentile | 20 |
| Maximum | 56 |
| Range | 56 |
| IQR | 6 |
| Mean | 11.1127 |
|---|---|
| Standard Deviation | 4.9707 |
| Variance | 24.7083 |
| Sum | 911176 |
| Skewness | 1.1358 |
| Kurtosis | 2.4375 |
| Coefficient of Variation | 0.4473 |
numerical
| Distinct Count | 14 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 0.1615 |
| Minimum | 0 |
| Maximum | 15 |
| Zeros | 70985 |
| Zeros (%) | 86.6% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th Percentile | 1 |
| Maximum | 15 |
| Range | 15 |
| IQR | 0 |
| Mean | 0.1615 |
|---|---|
| Standard Deviation | 0.4732 |
| Variance | 0.2239 |
| Sum | 13238 |
| Skewness | 4.9723 |
| Kurtosis | 52.1305 |
| Coefficient of Variation | 2.9307 |
numerical
| Distinct Count | 32727 |
|---|---|
| Unique (%) | 39.9% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 293207.5873 |
| Minimum | 0 |
| Maximum | 1.2987e+07 |
| Zeros | 478 |
| Zeros (%) | 0.6% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 30241.35 |
| Q1 | 113316 |
| Median | 209912 |
| Q3 | 366947 |
| 95-th Percentile | 759678.9 |
| Maximum | 1.2987e+07 |
| Range | 1.2987e+07 |
| IQR | 253631 |
| Mean | 293207.5873 |
|---|---|
| Standard Deviation | 354812.7199 |
| Variance | 1.2589e+11 |
| Sum | 2.4041e+10 |
| Skewness | 8.2736 |
| Kurtosis | 145.7713 |
| Coefficient of Variation | 1.2101 |
numerical
| Distinct Count | 44594 |
|---|---|
| Unique (%) | 54.4% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 792309.5881 |
| Minimum | 0 |
| Maximum | 1.5397e+09 |
| Zeros | 569 |
| Zeros (%) | 0.7% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 113688.3 |
| Q1 | 280439.5 |
| Median | 477752 |
| Q3 | 798462.5 |
| 95-th Percentile | 1.6795e+06 |
| Maximum | 1.5397e+09 |
| Range | 1.5397e+09 |
| IQR | 518023 |
| Mean | 792309.5881 |
|---|---|
| Standard Deviation | 9.2026e+06 |
| Variance | 8.4687e+13 |
| Sum | 6.4965e+10 |
| Skewness | 122.1399 |
| Kurtosis | 17133.1074 |
| Coefficient of Variation | 11.6149 |
numerical
| Distinct Count | 8 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 0.1132 |
| Minimum | 0 |
| Maximum | 7 |
| Zeros | 73271 |
| Zeros (%) | 89.4% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th Percentile | 1 |
| Maximum | 7 |
| Range | 7 |
| IQR | 0 |
| Mean | 0.1132 |
|---|---|
| Standard Deviation | 0.3444 |
| Variance | 0.1186 |
| Sum | 9284 |
| Skewness | 3.5394 |
| Kurtosis | 18.6402 |
| Coefficient of Variation | 3.0413 |
numerical
| Distinct Count | 12 |
|---|---|
| Unique (%) | 0.0% |
| Missing | 0 |
| Missing (%) | 0.0% |
| Infinite | 0 |
| Infinite (%) | 0.0% |
| Memory Size | 1.3 MB |
| Mean | 0.02806 |
| Minimum | 0 |
| Maximum | 15 |
| Zeros | 80494 |
| Zeros (%) | 98.2% |
| Negatives | 0 |
| Negatives (%) | 0.0% |
| Minimum | 0 |
|---|---|
| 5-th Percentile | 0 |
| Q1 | 0 |
| Median | 0 |
| Q3 | 0 |
| 95-th Percentile | 0 |
| Maximum | 15 |
| Range | 15 |
| IQR | 0 |
| Mean | 0.02806 |
|---|---|
| Standard Deviation | 0.2546 |
| Variance | 0.06484 |
| Sum | 2301 |
| Skewness | 16.0292 |
| Kurtosis | 432.3999 |
| Coefficient of Variation | 9.0738 |